This notebook will show the index of association across generations at intervals of 1,000 generations. In order to do this, all simulated files were uploaded to the CGRB infastructure and were analyzed with analyze_and_save_ia.R with no permutations. These were then saved in a directory called ia_over_generations/. The data for these values is in a directory on the infastructure called ia_over_generations_data/, but is quite large (151GB).
library('zksimanalysis') # Main package for analysis
library('magrittr') # For the %T>%
library('dplyr') # Magic data manipulation
library('tidyr') # more magic
library('purrr') # magic dealing with lists
library('ggplot2')
library('gtable')
library('grid')
library('viridis')
The data were downloaded from the CGRB server using rsync with the following command.
Note: replace XXXXX with the port number. I am not writing it here for security reasons.
rsync --update -tavz -e "ssh -p XXXXX" --exclude 'twenty_loci_results' \
kamvarz@files.cgrb.oregonstate.edu:/nfs1/BPP/Grunwald_Lab/home/kamvarz/simulation_analysis/results/ \
../simulation_results/
First the data from the first round of simulations with 6-10 alleles per locus.
datfiles <- dir(here::here("data", "ia_over_generations_n1000/"), full.names = TRUE)
locfiles <- dir(here::here("data", "locus_diversity_over_generations_n1000/"), full.names = TRUE)
datnames <- setNames(datfiles, datfiles)
locnames <- setNames(locfiles, locfiles)
for (i in datfiles){
datnames[i] <- load(i)
}
for (i in locfiles){
locnames[i] <- load(i)
}
account_for_source <- . %>%
map_df(get, .id = "source") %>% # map the `get()` function and return a tibble
mutate(mutation_rate = ifelse(grepl("mutant", source), "even", "uneven")) %>%
mutate(source = gsub("(^.+?feather).*$", "\\1.DATA.rda", source))
ex_run <- "high_mutation([0-9]+?)/"
ex_seed <- "seed_([0-9]+?)_"
ex_sex <- "sex_([0-9.]+?)_"
ex_gen <- "gen_([0-9]+?)_"
ex_rep <- "rep_([0-9]+?).pop"
ex_samp <- "_sam_([0-9]+?)$"
datalist <- datnames %>%
setNames(datnames) %>% # set the names for binding id
account_for_source %>%
tidyr::extract(pop, c("run", "seed", "sexrate", "gen", "rep", "sample"),
paste0(ex_run, ex_seed, ex_sex, ex_gen, ex_rep, ex_samp),
remove = FALSE) %>%
mutate(sample = factor(sample, unique(sample))) # Transform the sample to a factor
datalist <- locnames %>%
setNames(locnames) %>% # set the names for binding id
account_for_source %>%
inner_join(datalist, by = c("pop", "source", "mutation_rate"))
rm(list = datnames)
rm(list = locnames)
gc()
datalist
datfiles <- dir(here::here("data", "ia_over_generations_n1000_more_alleles/"), full.names = TRUE)
locfiles <- dir(here::here("data", "locus_diversity_over_generations_n1000_more_alleles/"), full.names = TRUE)
datnames <- setNames(datfiles, datfiles)
locnames <- setNames(locfiles, locfiles)
for (i in datfiles){
datnames[i] <- load(i)
}
for (i in locfiles){
locnames[i] <- load(i)
}
account_for_source <- . %>%
map_df(get, .id = "source") %>% # map the `get()` function and return a tibble
mutate(mutation_rate = ifelse(grepl("mutant", source), "even", "uneven")) %>%
mutate(source = gsub("(^.+?feather).*$", "\\1.DATA.rda", source))
ex_run <- "more_alleles_high_mutation([0-9]+?)/"
ex_seed <- "seed_([0-9]+?)_"
ex_sex <- "sex_([0-9.]+?)_"
ex_gen <- "gen_([0-9]+?)_"
ex_rep <- "rep_([0-9]+?).pop"
ex_samp <- "_sam_([0-9]+?)$"
datalist_ma <- datnames %>%
setNames(datnames) %>% # set the names for binding id
account_for_source %>%
tidyr::extract(pop, c("run", "seed", "sexrate", "gen", "rep", "sample"),
paste0(ex_run, ex_seed, ex_sex, ex_gen, ex_rep, ex_samp),
remove = FALSE) %>%
mutate(sample = factor(sample, unique(sample))) # Transform the sample to a factor
datalist_ma <- locnames %>%
setNames(locnames) %>% # set the names for binding id
account_for_source %>%
inner_join(datalist_ma, by = c("pop", "source", "mutation_rate"))
rm(list = datnames)
rm(list = locnames)
gc()
datalist_ma
Now that we have the data, we can visualize the stability of the simulations over generations.
old_theme <- theme_set(theme_bw(base_size = 16, base_family = "Helvetica"))
old_theme <- theme_update(axis.text.x = element_text(angle = 90, vjust = 0.5))
sample_colors <- scale_color_viridis(discrete = TRUE, direction = -1, option = "D")
iaplot <- datalist %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(rbarD = mean(rbarD, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = rbarD, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(bar(r)[d]),
x = "generation (x1000)",
title = "Index of association over 10,000 generations",
subtitle = "6 to 10 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
iaplot_ma <- datalist_ma %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(rbarD = mean(rbarD, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = rbarD, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(bar(r)[d]),
x = "generation (x1000)",
title = "Index of association over 10,000 generations",
subtitle = "10 to 20 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
iaplot
iaplot_ma
If we looked at allelic diversity statistics:
Hexpplot <- datalist %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(Hexp = mean(Hexp, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = Hexp, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(H[exp]),
x = "generation (x1000)",
title = "Nei's Gene Diversity over 10,000 generations",
subtitle = "6 to 10 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Hexpplot
Hexpplot_ma <- datalist_ma %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(Hexp = mean(Hexp, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = Hexp, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(H[exp]),
x = "generation (x1000)",
title = "Nei's Gene Diversity over 10,000 generations",
subtitle = "10 to 20 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Hexpplot_ma
Evenplot <- datalist %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(Evenness = mean(Evenness, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = Evenness, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(E[5][A]),
x = "generation (x1000)",
title = "Evenness over 10,000 generations",
subtitle = "6 to 10 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Evenplot
Evenplot_ma <- datalist_ma %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(Evenness = mean(Evenness, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = Evenness, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = expression(E[5][A]),
x = "generation (x1000)",
title = "Evenness over 10,000 generations",
subtitle = "10 to 20 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Evenplot_ma
Alleleplot <- datalist %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(allele = mean(allele, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = allele, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = "Mean number of alleles",
x = "generation (x1000)",
title = "Mean number of alleles over 10,000 generations",
subtitle = "6 to 10 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Alleleplot
Alleleplot_ma <- datalist_ma %>%
mutate(gen = round(as.numeric(gen), -3)/1000) %>%
mutate(runseed = paste(run, seed)) %>%
group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
summarize(allele = mean(allele, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = gen, y = allele, group = runseed, color = runseed)) +
geom_line(alpha = 0.25) +
facet_grid(mutation_rate ~ sexrate) +
scale_x_continuous(breaks = c(2, 4, 6, 8)) +
sample_colors +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16)) +
theme(aspect.ratio = 0.75) +
theme(panel.spacing = unit(0, "line")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
theme(legend.position = "none") +
labs(list(
y = "Mean number of alleles",
x = "generation (x1000)",
title = "Mean number of alleles over 10,000 generations",
subtitle = "10 to 20 alleles per locus",
caption = "\nValues for 100 unique seed summarized over 10 replicates"
))
Alleleplot_ma
options(width = 100)
devtools::session_info()
## Session info --------------------------------------------------------------------------------------
## setting value
## version R version 3.4.1 (2017-06-30)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Chicago
## date 2017-08-26
## Packages ------------------------------------------------------------------------------------------
## package * version date source
## ade4 * 1.7-8 2017-08-09 cran (@1.7-8)
## adegenet * 2.1.0 2017-07-17 local
## ape 4.1 2017-02-14 CRAN (R 3.4.0)
## assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0)
## backports 1.1.0 2017-05-22 CRAN (R 3.4.0)
## base * 3.4.1 2017-07-07 local
## bindr 0.1 2016-11-13 CRAN (R 3.4.0)
## bindrcpp * 0.2 2017-06-17 CRAN (R 3.4.0)
## bitops 1.0-6 2013-08-17 CRAN (R 3.4.0)
## boot 1.3-20 2017-07-30 CRAN (R 3.4.1)
## broom 0.4.2 2017-02-13 CRAN (R 3.4.0)
## caTools * 1.17.1 2014-09-10 CRAN (R 3.4.0)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.4.0)
## cluster 2.0.6 2017-03-16 CRAN (R 3.4.0)
## coda 0.19-1 2016-12-08 CRAN (R 3.4.0)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.4.0)
## compiler 3.4.1 2017-07-07 local
## datasets * 3.4.1 2017-07-07 local
## deldir 0.1-14 2017-04-22 CRAN (R 3.4.0)
## devtools 1.13.3 2017-08-02 CRAN (R 3.4.1)
## digest 0.6.12 2017-01-27 CRAN (R 3.4.0)
## dplyr * 0.7.2 2017-07-20 CRAN (R 3.4.1)
## evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
## expm 0.999-2 2017-03-29 CRAN (R 3.4.0)
## fastmatch 1.1-0 2017-01-28 CRAN (R 3.4.0)
## feather * 0.3.1 2016-11-09 CRAN (R 3.4.0)
## flux * 0.3-0 2014-04-25 CRAN (R 3.4.0)
## forcats 0.2.0 2017-01-23 CRAN (R 3.4.0)
## foreign 0.8-69 2017-06-21 CRAN (R 3.4.0)
## gdata 2.18.0 2017-06-06 CRAN (R 3.4.0)
## ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.0)
## glue 1.1.1 2017-06-21 CRAN (R 3.4.0)
## gmodels 2.16.2 2015-07-22 CRAN (R 3.4.0)
## graphics * 3.4.1 2017-07-07 local
## grDevices * 3.4.1 2017-07-07 local
## grid * 3.4.1 2017-07-07 local
## gridExtra 2.2.1 2016-02-29 CRAN (R 3.4.0)
## gtable * 0.2.0 2016-02-26 CRAN (R 3.4.0)
## gtools 3.5.0 2015-05-29 CRAN (R 3.4.0)
## haven 1.1.0 2017-07-09 CRAN (R 3.4.1)
## here 0.1 2017-05-28 CRAN (R 3.4.0)
## hms 0.3 2016-11-22 CRAN (R 3.4.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.4.0)
## httpuv 1.3.5 2017-07-04 CRAN (R 3.4.1)
## httr 1.2.1 2016-07-03 CRAN (R 3.4.0)
## igraph 1.1.2 2017-07-21 cran (@1.1.2)
## jsonlite 1.5 2017-06-01 CRAN (R 3.4.0)
## knitr 1.16 2017-05-18 CRAN (R 3.4.0)
## labeling 0.3 2014-08-23 CRAN (R 3.4.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.4.0)
## lazyeval 0.2.0 2016-06-12 CRAN (R 3.4.0)
## LearnBayes 2.15 2014-05-29 CRAN (R 3.4.0)
## lubridate 1.6.0 2016-09-13 CRAN (R 3.4.0)
## magrittr * 1.5 2014-11-22 CRAN (R 3.4.0)
## MASS 7.3-47 2017-04-21 CRAN (R 3.4.0)
## Matrix 1.2-10 2017-04-28 CRAN (R 3.4.0)
## memoise 1.1.0 2017-04-21 CRAN (R 3.4.0)
## methods * 3.4.1 2017-07-07 local
## mgcv 1.8-18 2017-07-28 CRAN (R 3.4.1)
## mime 0.5 2016-07-07 CRAN (R 3.4.0)
## mnormt 1.5-5 2016-10-15 CRAN (R 3.4.0)
## modelr 0.1.1 2017-07-24 CRAN (R 3.4.1)
## munsell 0.4.3 2016-02-13 CRAN (R 3.4.0)
## nlme 3.1-131 2017-02-06 CRAN (R 3.4.0)
## parallel 3.4.1 2017-07-07 local
## pegas 0.10 2017-05-03 CRAN (R 3.4.0)
## permute 0.9-4 2016-09-09 CRAN (R 3.4.0)
## phangorn 2.2.0 2017-04-03 CRAN (R 3.4.0)
## pinfsc50 1.1.0 2016-12-02 CRAN (R 3.4.0)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.4.0)
## poppr * 2.4.1.99-2 2017-08-27 Github (grunwaldlab/poppr@cd4cba2)
## poweRlaw * 0.70.0 2016-12-22 CRAN (R 3.4.0)
## psych 1.7.5 2017-05-03 CRAN (R 3.4.0)
## purrr * 0.2.3 2017-08-02 CRAN (R 3.4.1)
## quadprog 1.5-5 2013-04-17 CRAN (R 3.4.0)
## R6 2.2.2 2017-06-17 cran (@2.2.2)
## Rcpp 0.12.12 2017-07-15 cran (@0.12.12)
## readr * 1.1.1 2017-05-16 CRAN (R 3.4.0)
## readxl 1.0.0 2017-04-18 CRAN (R 3.4.0)
## reshape2 1.4.2 2016-10-22 CRAN (R 3.4.0)
## rlang 0.1.1 2017-05-18 CRAN (R 3.4.0)
## rmarkdown 1.6 2017-06-15 cran (@1.6)
## rprojroot 1.2 2017-01-16 CRAN (R 3.4.0)
## rvest 0.3.2 2016-06-17 CRAN (R 3.4.0)
## scales 0.4.1.9002 2017-08-02 Github (hadley/scales@842ad87)
## seqinr 3.4-5 2017-08-01 CRAN (R 3.4.1)
## shiny 1.0.5 2017-08-23 cran (@1.0.5)
## sp 1.2-5 2017-06-29 CRAN (R 3.4.1)
## spdep 0.6-13 2017-04-25 CRAN (R 3.4.0)
## splines 3.4.1 2017-07-07 local
## stats * 3.4.1 2017-07-07 local
## stats4 3.4.1 2017-07-07 local
## stringi 1.1.5 2017-04-07 CRAN (R 3.4.0)
## stringr 1.2.0 2017-02-18 CRAN (R 3.4.0)
## tibble * 1.3.3 2017-05-28 CRAN (R 3.4.0)
## tidyr * 0.6.3 2017-05-15 CRAN (R 3.4.0)
## tidyverse * 1.1.1 2017-01-27 CRAN (R 3.4.0)
## tools 3.4.1 2017-07-07 local
## utils * 3.4.1 2017-07-07 local
## vcfR * 1.5.0 2017-05-18 cran (@1.5.0)
## vegan 2.4-4 2017-08-24 cran (@2.4-4)
## VGAM 1.0-4 2017-07-25 CRAN (R 3.4.1)
## viridis * 0.4.0 2017-03-27 CRAN (R 3.4.0)
## viridisLite * 0.2.0 2017-03-24 CRAN (R 3.4.0)
## withr 2.0.0 2017-07-28 CRAN (R 3.4.1)
## xml2 1.1.1 2017-01-24 CRAN (R 3.4.0)
## xtable 1.8-2 2016-02-05 CRAN (R 3.4.0)
## yaml 2.1.14 2016-11-12 CRAN (R 3.4.0)
## zksimanalysis * 0.9.0.9000 2017-08-26 local